Pages

Monday, December 14, 2015

Script to monitor Goldengate replication


## Please test this script in your environment before you deploy.

The below script can be used to monitor your Goldengate replication and the script takes 3 input parameter values as below -

ORACLE_SID   : Database instance name that is configured for Goldengate replication.
GOLDENGATE_HOME : Directory where Goldengate is installed.
LAG_THRESHOLD   : Lag threshold in minutes

The below script will alert if any one of the following happens -
a. if any of the Goldengate process is abended or stopped.
b. if the replication latency is more than a value(in minutes) that is passed to the script as a parameter
c. if there are any errors/warnings in the goldengate log file (ggserr.log)

-- 

#!/bin/ksh
# Script      : monitoring_gg.sh
# Version     : 1.0
# Purpose     : To monitor Goldengate processes and latency
# Usage       : monitoring_gg.sh ORACLE_SID GOLDENGATE_HOME LAG_THRESHOLD
# Example     : monitoring_gg.sh DBNAME /u01/app/goldengate/11.2.1.0.17 30
#
# Parameters:
#   1. ORACLE_SID: Database Instance name. This parameter is used to set the appropriate DB environment variables. (NOTE: This is the Instance name not Database name.)
#   2. GOLDENGATE_HOME: Home directory of Goldengate installation
#   3. LAG_THRESHOLD: Allowed lag time in minutes
#
# ******************
# Functions
# ******************
# Usage function.
function show_usage {
    echo " "
    echo "Usage: $PWD/monitoring_gg.sh ORACLE_SID GOLDENGATE_HOME LAG_THRESHOLD"
    echo "   ORACLE_SID  : Name of the Database instance that is configured for Goldengate replication. "
    echo "   GOLDENGATE_HOME : Directory where Goldengate is installed. "
    echo "   LAG_THRESHOLD   : Lag threshold in minutes"
    echo "Example: $PWD/monitoring_gg.sh DBNAME /u01/app/goldengate/11.2.1.0.17 30"
    echo " "
    exit 1
}

# **************************************
# Input parameter validation
# **************************************
if [ "$1" ]
then
   ORACLE_SID=`echo $1 | tr "[a-z]" "[A-Z]" `; export ORACLE_SID
else
   show_usage
fi

if [ "$2" ]
then
   GOLDENGATE_HOME=`echo $2`; export GOLDENGATE_HOME
else
   show_usage
fi

if [ "$3" ]
then
   LAG_THRESHOLD=`echo $3`; export LAG_THRESHOLD
else
   show_usage
fi

# **************************************
# Setting up the environment
# **************************************
ORAENV_ASK=NO; export ORAENV_ASK
. /usr/local/bin/oraenv
ORAENV_ASK= ; export ORAENV_ASK

export LD_LIBRARY_PATH=$ORACLE_HOME/lib:$GOLDENGATE_HOME
export PATH=$GOLDENGATE_HOME:$PATH
export MAIL_LIST=abcd@efgh.com
" 

# **************************************
# Gather Goldengate information
# **************************************
cd $GOLDENGATE_HOME
$GOLDENGATE_HOME/ggsci -s << EOF > /tmp/monitoring_gg.log
info all
exit
EOF

# ********************************************
# Monitoring Godlengate processes and lag time
# ********************************************
cat /tmp/monitoring_gg.log | egrep 'MANAGER|EXTRACT|REPLICAT'| tr ":" " " | while read LINE
do
  case $LINE in
    *)
    PROCESS_TYPE=`echo $LINE | awk -F" " '{print $1}'`
    PROCESS_STATUS=`echo $LINE | awk -F" " '{print $2}'`
    if [ "$PROCESS_TYPE" == "MANAGER" ]
    then
       if [ "$PROCESS_STATUS" != "RUNNING" ]
       then
           SUBJECT="ALERT ... Goldengate process \"$PROCESS_TYPE\" is $PROCESS_STATUS on `uname -n`($ORACLE_SID)"
           mailx -s "$SUBJECT" $MAIL_LIST < $GOLDENGATE_HOME/dirrpt/MGR.rpt
           exit 1
       else
           continue
       fi
    elif [ "$PROCESS_TYPE" == "JAGENT" ]
    then
       if [ "$PROCESS_STATUS" != "RUNNING" ]
       then
           SUBJECT="WARNING ... Goldengate process \"$PROCESS_TYPE\" is $PROCESS_STATUS on `uname -n`"
           mailx -s "$SUBJECT" $MAIL_LIST < $GOLDENGATE_HOME/dirrpt/JAGENT.rpt
       fi
    else
       PROCESS_NAME=`echo $LINE | awk -F" " '{print $3}'`
       LAG_HH=`echo $LINE | awk -F" " '{print $4}'`
       LAG_MM=`echo $LINE | awk -F" " '{print $5}'`
       LAG_SS=`echo $LINE | awk -F" " '{print $6}'`
       CKPT_HH=`echo $LINE | awk -F" " '{print $7}'`
       CKPT_MM=`echo $LINE | awk -F" " '{print $8}'`
       CKPT_SS=`echo $LINE | awk -F" " '{print $9}'`

       if [ "$PROCESS_STATUS" != "RUNNING" ]
       then
           SUBJECT="ALERT ... Goldengate process \"$PROCESS_TYPE($PROCESS_NAME)\" is $PROCESS_STATUS on `uname -n`($ORACLE_SID)"
           mailx -s "$SUBJECT" $MAIL_LIST < $GOLDENGATE_HOME/dirrpt/${PROCESS_NAME}.rpt
       else
           if [ $LAG_HH -gt 00 -o $LAG_MM -ge $LAG_THRESHOLD ];
           then
              SUBJECT="ALERT ... Goldengate process \"$PROCESS_TYPE($PROCESS_NAME)\" has a lag of "$LAG_HH" hour "$LAG_MM" min on `uname -n`($ORACLE_SID)"
              mailx -s "$SUBJECT" $MAIL_LIST < $GOLDENGATE_HOME/dirrpt/${PROCESS_NAME}.rpt
           fi
 
      fi
    fi
  esac
done

# **************************************
# Monitoring Godlengate Error log
# **************************************

GG_ERROR_FILE=$GOLDENGATE_HOME/ggserr.log
GG_ERROR_MNTR=$GOLDENGATE_HOME/ggserr.monitor
GG_ERROR_DIFF=$GOLDENGATE_HOME/ggserr.diff
touch $GG_ERROR_DIFF

if [[ -r ${GG_ERROR_FILE} ]]
then
    touch $GG_ERROR_MNTR
    cp $GG_ERROR_MNTR $GG_ERROR_MNTR".old"
    egrep "ERROR|WARNING" $GG_ERROR_FILE > $GG_ERROR_MNTR

    CUR_CNT=`cat $GG_ERROR_MNTR |wc -l`
    PRV_CNT=`cat $GG_ERROR_MNTR".old" |wc -l`

    if [[ $CUR_CNT -lt $PRV_CNT ]]
    then
        # This means that ggserr.log is purged, so let's clear out the previous monitor results
        PRV_CNT=0
    fi

    if [[ $CUR_CNT -gt $PRV_CNT ]]
    then
        diff $GG_ERROR_MNTR $GG_ERROR_MNTR".old" | grep "^<" | sed -e 's/^< //' > ${GG_ERROR_DIFF}

        # send a mail..
        if test `cat ${GG_ERROR_DIFF} | wc -l` -gt 1
        then
            # multipl errors found. process only the last ten.
            tail -10 $GG_ERROR_DIFF > $GG_ERROR_DIFF".1"
        else
            # single error found.
            cp $GG_ERROR_DIFF $GG_ERROR_DIFF".1"
        fi
        SUBJECT="WARNING .. Errors encountered in Goldengate replication on `uname -n`($ORACLE_SID)"
        mailx -s "$SUBJECT" $MAIL_LIST < $GG_ERROR_DIFF".1"
    fi
    rm $GG_ERROR_DIFF*
fi

No comments:

Post a Comment