this is my test case. In my real world i have a few linux boxes and some active checks which uses ssh. If the ssh daemon fails, i get also notified for all the other dozend services which USE ssh.
Now i would like to define a Host Group and Assign Services to it. Here we go:
====================================================================================
Code: Select all
## Host definition.
# This is a normal linux box
define host{
use ha-linux-server
host_name MarioTest
address mario.example.com
hostgroups basic_linux_server
}
# Define Hostgroup
define hostgroup{
hostgroup_name basic_linux_server
}
## check disk by ssh
define service{
use TLS_ha-generic-service
service_description Basic Linux Service Disk Check
check_command TLS_check_disk_by_ssh!15%!10%!-x /backup!
hostgroup basic_linux_server
servicegroups ssh_driven_checks
}
# check ssh service itself
define service{
use TLS_ha-generic-service ; Name of service template to use
service_description Basic Linux Service SSH Check
check_command TLS_check_ssh
hostgroup basic_linux_server
normal_check_interval 1
retry_check_interval 1
}
define servicegroup{
servicegroup_name ssh_driven_checks
alias ssh_driven_checks
}
#
# => this seems to the problem!!!!
# If the „Basic Linux Service SSH Check“ fails, the other checks which use ssh should be ignored
#
# => in this config i still get notifications for the disk service if ssh fails
define servicedependency{
service_description Basic Linux Service SSH Check
dependent_servicegroup_name ssh_driven_checks
execution_failure_criteria w,u,c
notification_failure_criteria w,u,c
}
# commands
define command{
command_name TLS_check_disk_by_ssh
command_line openssh root@$HOSTADDRESS$ "/usr/lib/nagios/plugins/check_disk -w $ARG1$ -c $ARG2$ -x /dev -x /proc $ARG3$"
}
define command{
command_name TLS_check_ssh
command_line $USER1$/check_ssh $ARG1$ $HOSTADDRESS$
}
# templates
define host{
name TLS_ha-linux-server ; The name of this host template
use generic-host ; This template inherits other values from the generic-host template
check_period 24x7 ; By default, Linux hosts are checked round the clock
check_interval 2 ; Actively check the host every 5 minutes
retry_interval 1 ; Schedule host check retries at 1 minute intervals
max_check_attempts 3 ; Check each Linux host 10 times (max)
check_command check-host-alive ; Default command to check Linux hosts
notification_period 24x7 ; Linux admins hate to be woken up, so we only notify during the day
; Note that the notification_period variable is being overridden from
; the value that is inherited from the generic-host template!
notification_interval 30 ; Resend notifications every 2 hours
notification_options d,r ; Only send notifications for specific host states
contact_groups TLS_admins-email, TLS_admins-sms
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE!
icon_image dot.png' alt="" border="0"></a><A TARGET="_blank" HREF="graphs.cgi?host=$HOSTNAME$&service=check-host-alive"><img src="/nagios/images/logos/graph.png"
}
define service{
name TLS_ha-generic-service ; The 'name' of this service template
active_checks_enabled 1 ; Active service checks are enabled
passive_checks_enabled 1 ; Passive service checks are enabled/accepted
parallelize_check 1 ; Active service checks should be parallelized (disabling this can lead to major performance problems)
obsess_over_service 1 ; We should obsess over this service (if necessary)
check_freshness 0 ; Default is to NOT check service 'freshness'
notifications_enabled 1 ; Service notifications are enabled
event_handler_enabled 1 ; Service event handler is enabled
flap_detection_enabled 0 ; Flap detection is enabled
failure_prediction_enabled 1 ; Failure prediction is enabled
process_perf_data 1 ; Process performance data
retain_status_information 1 ; Retain status information across program restarts
retain_nonstatus_information 1 ; Retain non-status information across program restarts
is_volatile 0 ; The service is not volatile
check_period 24x7 ; The service can be checked at any time of the day
max_check_attempts 5 ; Re-check the service up to 3 times in order to determine its final (hard) state
normal_check_interval 2 ; Check the service every 10 minutes under normal conditions
retry_check_interval 2 ; Re-check the service every two minutes until a hard state can be determined
contact_groups TLS_admins-email, TLS_admins-sms
notification_options w,u,c,r ; Send notifications about warning, unknown, critical, and recovery events
notification_interval 30 ; Re-notify about service problems every hour
notification_period 24x7 ; Notifications can be sent out at any time
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
}
Why does the dependency not work? Why do i get notified for all related services?
Thanks a lot,
Mario