Thanks.
Code: Select all
*/2 * * * * /opt/home/cl/cron_for_aws
MASTER_PORT_A=5667
MASTER_HOST_AWS=ip
ROOT=~cloverleaf/libexec
NSCA=${ROOT}/send_nsca
NSCA_AWS=${ROOT}/send_nsca_1.9
NSCA_AWS_CMD="-H ${MASTER_HOST_AWS} -p ${MASTER_PORT_A} -c ${NSCA}.cfg"
ECHO="/bin/echo -e"
tempfile=/tmp/cldp-nsca-aws.$$
trap "rm $tempfile" 0 1 2 15
# these must get changed per host and per service
this_host='saturn'
# format of the file must be
# hostname_in_nagios service_name_in_nagios status extended
# check for mysql replication
x=`${ROOT}/check_mysql -ucl-ppass -S`
status=$?
x=${x##REPLICATION }
message=${x##*:}
line="${this_host}\tmysql_replication\t${status}\t${x}"
echo $x
$ECHO $line > $tempfile
$NSCA_AWS $NSCA_AWS_CMD < $tempfile
# check for mysql at ALL
x=`${ROOT}/check_mysql -ucl -ppass`
status=$?
x=${x##MYSQL }
message=${x##OK }
line="${this_host}\tmysql_status\t${status}\t${message}"
$ECHO $line > $tempfile
$NSCA_AWS $NSCA_AWS_CMD < $tempfile
# check for disk space
x=`${ROOT}/check_disk -w 10% -c 5% -p /`
status=$?
x=${x##DISK }
message=${x##* }
line="${this_host}\tdisk_space\t ${status}\t${message}"
$ECHO $line > $tempfile
$NSCA_AWS $NSCA_AWS_CMD < $tempfile
# check for /opt space
x=`${ROOT}/check_disk -w 10% -c 5% -p /opt`
status=$?
x=${x##DISK }
message=${x##* }
line="${this_host}\topt_space\t ${status}\t${message}"
$ECHO $line > $tempfile
$NSCA_AWS $NSCA_AWS_CMD < $tempfile
# check for file
MAGIC_FILE=~cl/html/images/SENTINEL
x=`${ROOT}/check_file_age -w 600 -c 900 -f ${MAGIC_FILE}`
status=$?
#status=${x%% *}
message=${x##*-}
line="${this_host}\tcheck_rsync\t${status}\t${message}"
$ECHO $line > $tempfile
$NSCA_AWS $NSCA_AWS_CMD < $tempfile
# check for activity
# needs to be in the ginger directory
cd ~cl/html/riviera/
x=`/usr/bin/php ~cl/html/riviera/ck_activity.php`
status=$?
#status=${x%% *}
message=${x##*-}
line="${this_host}\tweb_active\t${status}\t${message}"
$ECHO $line > $tempfile
$NSCA_AWS $NSCA_AWS_CMD < $tempfile
# check for News update
x=`/usr/bin/php ~cl/libexec/saturn-mysql-check.php`
status=$?
message=${x##*-}
line="${this_host}\tnews_update\t${status}\t${message}"
$ECHO $line > $tempfile
$NSCA_AWS $NSCA_AWS_CMD < $tempfile(Additional information added by Jesse):
-Upgraded recently from CentOS 5 to CentOS 7
-Upgraded recently from Nagios Core 3.x to a higher version of 3.x
-send_nsca was updated
-send_nsca is running via cron every minute
-Issue happens to the four servers below that all have these checks, other passive checks are not alerting:
Code: Select all
define service{
use passive-check
host_name mercury
service_description disk_space
}
define service{
use passive-check
host_name mercury
service_description opt_space
}
define service{
use passive-check
host_name mercury
service_description mysql_status
}
define service{
use passive-check
host_name mercury
service_description mysql_replication
}
define service{
use passive-check
host_name mercury
service_description web_active
register 0
}
define service{
use passive-check
host_name mercury
service_description check_rsync
}
define service{
use 12-hour-active-check
host_name mercury
service_description Awstats Copy
check_command check_file_age!90000!180000!/var/www/awstats/digeo-mercury
}
define service{
use 12-hour-active-check
host_name mercury
service_description Log Schlep
check_command check_logs!mercury
register 0
}
define servicegroup{
servicegroup_name mercury_noncrit
alias Mercury Non-Critical
members mercury, opt_space, mercury, disk_space, mercury, mysql_replication, mercury, check_rsync
}
define servicegroup{
servicegroup_name mercury_crit
alias Mercury Critical
members mercury, mysql_status
}
####################################################
# Non-Critical Service Alerts
# Daytime: immediate e-mail
# pagers if not acknowledged in 30 min.
# e-mail every hour
# After Hours: single e-mail
#####################################################
define serviceescalation{
servicegroup_name mercury_noncrit
first_notification 1
last_notification 1
notification_interval 30
escalation_period 24x7
contact_groups cld, digeo
}
define serviceescalation{
servicegroup_name mercury_noncrit
first_notification 2
last_notification 2
escalation_period days
contact_groups pagers
}
define serviceescalation{
servicegroup_name mercury_noncrit
first_notification 3
last_notification 0
notification_interval 120
escalation_period 24x7
contact_groups cld
}
define serviceescalation{
servicegroup_name mercury_noncrit
first_notification 1
last_notification 1
notification_interval 60
escalation_period nights
contact_groups cld
}
####################################################
# Critical Service Alerts
# Daytime: immediate page
# immediate e-mail
# hourly e-mails
#
# After Hours: immediate e-mail
# immediate page
# follow-up page in 30 min.
#######################################
define serviceescalation{
servicegroup_name mercury_crit
first_notification 1
last_notification 1
notification_interval 1
escalation_period 24x7
contact_groups cld, digeo
}
define serviceescalation{
servicegroup_name mercury_crit
first_notification 2
last_notification 2
notification_interval 59
escalation_period days
contact_groups pagers
}
define serviceescalation{
servicegroup_name mercury_crit
first_notification 3
last_notification 0
notification_interval 60
escalation_period days
contact_groups cld
}
define serviceescalation{
servicegroup_name mercury_crit
first_notification 2
last_notification 3
notification_interval 30
escalation_period nights
contact_groups pagers
}
define serviceescalation{
servicegroup_name mercury_crit
first_notification 3
last_notification 3
notification_interval 60
escalation_period nights
contact_groups cld
}