Not receiving alerts (flaps ?)
Posted: Fri Jan 30, 2015 1:43 pm
Guys, My nagios server (Nagios Core 3.5.1) is not sending me the alerts for certain services. I have configured the server to monitor elastic search using some nrpe plugins. My client restarted the services in between and complained that they didnt get the notification for the time period. While checking the logs, I can see the services were logged in the log:
/var/log/nagios3/archives/nagios-01-20-2015-00.log
I have noted the word 'flapping' in the logs and searched more about it. I can see enable_flap_detection=1 is set in /etc/nagios3/nagios.cfg.
Does that means the server detected the service status as flapping and didn't send the notifications ? In that case,how can I get atleast one notification such as the 'flapping is started and may stop further alerts for this service'.
Following are my configurations :
/etc/nagios3/servers/Testserver1.cfg
vi /etc/nagios3/conf.d/g_generic.cfg
vi /etc/nagios3/conf.d/g_generic_service.cfg
vi /etc/nagios3/conf.d/g_contacts.cfg
Please let me know if any more details required.
/var/log/nagios3/archives/nagios-01-20-2015-00.log
Code: Select all
1421751380] Auto-save of retention data completed successfully.
[1421754980] Auto-save of retention data completed successfully.
[1421757490] SERVICE ALERT: Testserver2;Elastic_Search;WARNING;HARD;1;One or more indexes are missing replica shards. Use -vv to list them.
[1421757560] SERVICE ALERT: Testserver3;Elastic_Search;WARNING;HARD;1;One or more indexes are missing replica shards. Use -vv to list them.
[1421757670] SERVICE ALERT: Testserver1;Elastic_Search;WARNING;HARD;1;One or more indexes are missing replica shards. Use -vv to list them.
[1421757790] SERVICE ALERT: Testserver2;Elastic_Search;OK;HARD;1;Monitoring cluster 'production33'
[1421757860] SERVICE ALERT: Testserver3;Elastic_Search;OK;HARD;1;Monitoring cluster 'production33'
[1421758090] SERVICE ALERT: Testserver2;Elastic_Search;CRITICAL;HARD;1;[Errno 111] Connection refused
[1421758270] SERVICE ALERT: Testserver1;Elastic_Search;OK;HARD;1;Monitoring cluster 'production33'
[1421758390] SERVICE ALERT: Testserver2;Elastic_Search;OK;HARD;1;Monitoring cluster 'production33'
[1421758390] SERVICE FLAPPING ALERT: Testserver2;Elastic_Search;STARTED; Service appears to have started flapping (24.2% change >= 20.0% threshold)
[1421758580] Auto-save of retention data completed successfully.
[1421762180] Auto-save of retention data completed successfully.
[1421764090] SERVICE FLAPPING ALERT: Testserver2;Elastic_Search;STOPPED; Service appears to have stopped flapping (3.8% change < 5.0% threshold)
[1421765780] Auto-save of retention data completed successfully.Does that means the server detected the service status as flapping and didn't send the notifications ? In that case,how can I get atleast one notification such as the 'flapping is started and may stop further alerts for this service'.
Following are my configurations :
/etc/nagios3/servers/Testserver1.cfg
define service{
use generic-service_dd ; Name of service template to use
host_name Testserver1
service_description Elastic_Search
check_command check_nrpe_1arg!check_elasticsearch
}
vi /etc/nagios3/conf.d/g_generic.cfg
Code: Select all
define host{
name generic-host_dd ; The name of this host template
notifications_enabled 1 ; Host notifications are enabled
event_handler_enabled 1 ; Host event handler is enabled
flap_detection_enabled 1 ; Flap detection is enabled
failure_prediction_enabled 1 ; Failure prediction is enabled
process_perf_data 1 ; Process performance data
retain_status_information 1 ; Retain status information across program restarts
retain_nonstatus_information 1 ; Retain non-status information across program restarts
check_command check-host-alive
max_check_attempts 10
notification_interval 0
notification_period 24x7
notification_options d,u,r
contact_groups gmail_group
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE!
}
vi /etc/nagios3/conf.d/g_generic_service.cfg
Code: Select all
# generic service template definition
define service{
name generic-service_dd ; The 'name' of this service template
active_checks_enabled 1 ; Active service checks are enabled
passive_checks_enabled 1 ; Passive service checks are enabled/accepted
parallelize_check 1 ; Active service checks should be parallelized (disabling this can lead to major performance problems)
obsess_over_service 1 ; We should obsess over this service (if necessary)
check_freshness 0 ; Default is to NOT check service 'freshness'
notifications_enabled 1 ; Service notifications are enabled
event_handler_enabled 1 ; Service event handler is enabled
flap_detection_enabled 1 ; Flap detection is enabled
failure_prediction_enabled 1 ; Failure prediction is enabled
process_perf_data 1 ; Process performance data
retain_status_information 1 ; Retain status information across program restarts
retain_nonstatus_information 1 ; Retain non-status information across program restarts
notification_interval 0 ; Only send notifications on status change by default.
is_volatile 0
check_period 24x7
normal_check_interval 5
retry_check_interval 1
max_check_attempts 4
notification_period 24x7
notification_options w,u,c,r
contact_groups gmail_group
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
}
vi /etc/nagios3/conf.d/g_contacts.cfg
Code: Select all
define contact{
contact_name nagiosuser1
alias Root
service_notification_period 24x7
host_notification_period 24x7
service_notification_options w,u,c,r
host_notification_options d,r
service_notification_commands notify-service-by-email
host_notification_commands notify-host-by-email
email [email protected]
}
define contact{
contact_name nagiosuser2
alias Root
service_notification_period 24x7
host_notification_period 24x7
service_notification_options w,u,c,r
host_notification_options d,r
service_notification_commands notify-service-by-email
host_notification_commands notify-host-by-email
email [email protected]
}
define contact{
contact_name Tom
alias Root
service_notification_period 24x7
host_notification_period 24x7
service_notification_options w,u,c,r
host_notification_options d,r
service_notification_commands notify-service-by-email
host_notification_commands notify-host-by-email
email [email protected]
}
define contactgroup{
contactgroup_name gmail_group
alias client special contact Group
members nagiosuser1,nagiosuser2,Tom
}