Basically I have an old nagios server that we're trying to get people off of, but in the mean time trying to keep going. It works well for the most part but I have some services that seem to not be notifying. Others on the box do notify, but I can't find what the missing piece is that is preventing ones like this to notify. Just presenting all information on one host/check to simplify. But this one is in hard critical, but current_notification_number=0 (along with last/next notification, etc).
Any help would be VERY appreciated as I've beaten my head against the wall on this for a goodly amount of time.
Sample logs:
Code: Select all
$ tail -100 /usr/local/nagios/var/nagios.log | grep 'REDACTED_HOSTNAME' | grep 'check_ssl_certificate'
[1556902693] EXTERNAL COMMAND: PROCESS_SERVICE_CHECK_RESULT;REDACTED_HOSTNAME;check_ssl_certificate;3;NRPE: Unable to read output
[1556902693] PASSIVE SERVICE CHECK: REDACTED_HOSTNAME;check_ssl_certificate;3;NRPE: Unable to read output
[1556902723] EXTERNAL COMMAND: PROCESS_SERVICE_CHECK_RESULT;REDACTED_HOSTNAME;check_ssl_certificate;3;NRPE: Unable to read output
[1556902723] PASSIVE SERVICE CHECK: REDACTED_HOSTNAME;check_ssl_certificate;3;NRPE: Unable to read output
nagios.cfg (cat nagios.cfg | grep -v -e '^#' -e '^$' | sort):
Code: Select all
accept_passive_host_checks=1
accept_passive_service_checks=1
additional_freshness_latency=15
admin_email=nagios@localhost
admin_pager=pagenagios@localhost
allow_empty_hostgroup_assignment=1
auto_reschedule_checks=0
auto_rescheduling_interval=30
auto_rescheduling_window=180
bare_update_check=0
broker_module=/usr/local/nagios/lib/mk-livestatus/livestatus.o /usr/local/nagios/var/rw/live max_response_size=1048576000
cached_host_check_horizon=15
cached_service_check_horizon=15
cfg_dir=/usr/local/nagios/etc/objects/contactgroups
cfg_dir=/usr/local/nagios/etc/objects/hostdependencies
cfg_dir=/usr/local/nagios/etc/objects/hostgroups
cfg_dir=/usr/local/nagios/etc/objects/hosts
cfg_dir=/usr/local/nagios/etc/objects/servicedependencies
cfg_dir=/usr/local/nagios/etc/objects/serviceescalations
cfg_dir=/usr/local/nagios/etc/objects/servicegroups
cfg_dir=/usr/local/nagios/etc/objects/services
cfg_file=/usr/local/nagios/etc/objects/commands.cfg
cfg_file=/usr/local/nagios/etc/objects/contacts.cfg
cfg_file=/usr/local/nagios/etc/objects/hostescalations.cfg
cfg_file=/usr/local/nagios/etc/objects/hosts.cfg
cfg_file=/usr/local/nagios/etc/objects/notifycommands.cfg
cfg_file=/usr/local/nagios/etc/objects/parents.cfg
cfg_file=/usr/local/nagios/etc/objects/services.cfg
cfg_file=/usr/local/nagios/etc/objects/timeperiods.cfg
check_external_commands=1
check_for_orphaned_hosts=1
check_for_orphaned_services=1
check_for_updates=1
check_host_freshness=1
check_result_path=/usr/local/nagios/var/spool/checkresults
check_result_reaper_frequency=2
check_service_freshness=1
command_check_interval=-1
command_file=/usr/local/nagios/var/rw/nagios.cmd
daemon_dumps_core=0
date_format=us
debug_file=/usr/local/nagios/var/nagios.debug
debug_level=0
debug_verbosity=1
enable_embedded_perl=1
enable_environment_macros=1
enable_event_handlers=1
enable_flap_detection=1
enable_notifications= 1
enable_predictive_host_dependency_checks=1
enable_predictive_service_dependency_checks=1
event_broker_options=-1
event_handler_timeout=30
execute_host_checks=1
execute_service_checks=1
external_command_buffer_slots=8192
high_host_flap_threshold=20.0
high_service_flap_threshold=20.0
host_check_timeout=30
host_freshness_check_interval=180
host_inter_check_delay_method=n
host_perfdata_file_processing_command=rotate_host_perfdata
host_perfdata_file_processing_interval=60
host_perfdata_file_template=DATATYPE::HOSTPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tHOSTPERFDATA::$HOSTPERFDATA$\tHOSTCHECKCOMMAND::$HOSTCHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$\tLATENCY::$HOSTLATENCY$\tEXECUTIONTIME::$HOSTEXECUTIONTIME$
host_perfdata_file=/usr/local/nagios/var/spool/host-perfdata
illegal_macro_output_chars=`~$|'"<>
illegal_object_name_chars=`~!$%^&*|'"<>?,()=
interval_length=60
lock_file=/usr/local/nagios/var/nagios.lock
log_archive_path=/usr/local/nagios/var/archives
log_event_handlers=1
log_external_commands=1
log_file=/usr/local/nagios/var/nagios.log
log_host_retries=1
log_initial_states=1
log_notifications=1
log_passive_checks=1
log_rotation_method=d
log_service_retries=1
low_host_flap_threshold=5.0
low_service_flap_threshold=5.0
max_check_result_file_age=3600
max_check_result_reaper_time=20
max_concurrent_checks=0
max_debug_file_size=1000000
max_host_check_spread=30
max_service_check_spread=30
nagios_group=nagios
nagios_user=nagios
notification_timeout=30
object_cache_file=/dev/shm/objects.cache
obsess_over_hosts=0
obsess_over_services=0
ocsp_timeout=5
p1_file=/usr/local/nagios/bin/p1.pl
passive_host_checks_are_soft=0
perfdata_timeout=5
precached_object_file=/usr/local/nagios/var/objects.precache
process_performance_data=0
resource_file=/usr/local/nagios/etc/resource.cfg
retained_contact_host_attribute_mask=0
retained_contact_service_attribute_mask=0
retained_host_attribute_mask=0
retained_process_host_attribute_mask=0
retained_process_service_attribute_mask=0
retained_service_attribute_mask=0
retain_state_information=1
retention_update_interval=60
service_check_timeout=420
service_freshness_check_interval=1800
service_inter_check_delay_method=s
service_interleave_factor=s
service_perfdata_file_processing_command=rotate_service_perfdata
service_perfdata_file_processing_interval=60
service_perfdata_file_template=DATATYPE::SERVICEPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tSERVICEDESC::$SERVICEDESC$\tSERVICEPERFDATA::$SERVICEPERFDATA$\tSERVICECHECKCOMMAND::$SERVICECHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$\tSERVICESTATE::$SERVICESTATE$\tSERVICESTATETYPE::$SERVICESTATETYPE$\tLATENCY::$SERVICELATENCY$\tEXECUTIONTIME::$SERVICEEXECUTIONTIME$
service_perfdata_file=/usr/local/nagios/var/spool/service-perfdata
sleep_time=0.25
soft_state_dependencies=0
state_retention_file=/usr/local/nagios/var/retention.dat
status_file=/dev/shm/status.dat
status_update_interval=10
temp_file=/usr/local/nagios/var/nagios.tmp
temp_path=/tmp
translate_passive_host_checks=0
use_aggressive_host_checking=0
use_embedded_perl_implicitly=1
use_large_installation_tweaks=1
use_regexp_matching=0
use_retained_program_state=1
use_retained_scheduling_info=1
use_syslog=1
use_timezone=US/Pacific
use_true_regexp_matching=0
Code: Select all
define service{
use new-service
name check_ssl_certificate
service_description check_ssl_certificate
hostgroup_name REDACTED_HOSTGROUP
check_command check_nrpe!check_ssl_certificate
}
define serviceextinfo{
hostgroup REDACTED_HOSTGROUP
service_description check_ssl_certificate
notes Check the SSL Certificate
notes_url REDACTED_RUNBOOK
}
define service{
use generic-service
host_name dummy-host-all-services
name new-service
initial_state o
contact_groups ops-alert
register 0
}
define service{
name generic-service
host_name dummy-host-all-services
active_checks_enabled 1
passive_checks_enabled 1
parallelize_check 1
obsess_over_service 1
check_freshness 1
freshness_threshold 21600
notifications_enabled 1
event_handler_enabled 1
flap_detection_enabled 1
failure_prediction_enabled 1
process_perf_data 1
retain_status_information 1
retain_nonstatus_information 1
is_volatile 0
check_period 24x7
max_check_attempts 5
normal_check_interval 10
retry_check_interval 2
contact_groups ops-alert
notification_options w,c
notification_interval 60
register 0
}
Code: Select all
define contactgroup{
contactgroup_name ops-alert
alias Operations Email Alerts
members alerting-contact
}
define contact{
contact_name alerting-contact
alias Default Alerts
email REDACTED_EMAIL
service_notification_period 24x7
host_notification_period 24x7
service_notification_options w,u,c,r
host_notification_options d,u,r
service_notification_commands notify-by-email
host_notification_commands host-notify-by-email
}
status.dat extractions:
System status:
Code: Select all
info {
created=1556902932
version=3.5.1
last_update_check=1556886338
update_available=1
last_version=3.5.1
new_version=4.4.3
}
programstatus {
modified_host_attributes=1
modified_service_attributes=1
nagios_pid=13604
daemon_mode=1
program_start=1556838309
last_command_check=1556902927
last_log_rotation=1556866800
enable_notifications=1
active_service_checks_enabled=1
passive_service_checks_enabled=1
active_host_checks_enabled=1
passive_host_checks_enabled=1
enable_event_handlers=1
obsess_over_services=0
obsess_over_hosts=0
check_service_freshness=1
check_host_freshness=1
enable_flap_detection=1
enable_failure_prediction=1
process_performance_data=0
global_host_event_handler=
global_service_event_handler=
next_comment_id=311496
next_downtime_id=76679
next_event_id=4008742
next_problem_id=1992485
next_notification_id=819
total_external_command_buffer_slots=8192
used_external_command_buffer_slots=0
high_external_command_buffer_slots=164
active_scheduled_host_check_stats=0,0,0
active_ondemand_host_check_stats=8,162,506
passive_host_check_stats=0,0,0
active_scheduled_service_check_stats=1223,5870,17025
active_ondemand_service_check_stats=0,0,0
passive_service_check_stats=0,52,53
cached_host_check_stats=7,145,458
cached_service_check_stats=0,0,0
external_command_stats=0,105,323
parallel_host_check_stats=0,17,47
serial_host_check_stats=0,0,0
}Code: Select all
hoststatus {
host_name=REDACTED_HOSTNAME
modified_attributes=0
check_command=check-host-alive
check_period=24x7
notification_period=24x7
check_interval=0.000000
retry_interval=1.000000
event_handler=
has_been_checked=1
should_be_scheduled=0
check_execution_time=0.118
check_latency=0.000
check_type=0
current_state=0
last_hard_state=0
last_event_id=1785143
current_event_id=1786291
current_problem_id=0
last_problem_id=889097
plugin_output=FPING OK - REDACTED_IP (loss=0%, rta=0.630000 ms)
long_plugin_output=
performance_data=loss=0%;80;100;0;100 rta=0.000630s;3.000000;5.000000;0.000000
last_check=1556697528
next_check=1556697528
check_options=0
current_attempt=1
max_attempts=3
state_type=1
last_state_change=1539414364
last_hard_state_change=1537830633
last_time_up=1556697531
last_time_down=1539411751
last_time_unreachable=0
last_notification=0
next_notification=0
no_more_notifications=0
current_notification_number=0
current_notification_id=0
notifications_enabled=1
problem_has_been_acknowledged=0
acknowledgement_type=0
active_checks_enabled=1
passive_checks_enabled=1
event_handler_enabled=1
flap_detection_enabled=1
failure_prediction_enabled=1
process_performance_data=1
obsess_over_host=1
last_update=1556902932
is_flapping=0
percent_state_change=0.00
scheduled_downtime_depth=0
}Code: Select all
servicestatus {
host_name=REDACTED_HOSTNAME
service_description=check_ssl_certificate
modified_attributes=0
check_command=check_nrpe!check_ssl_certificate
check_period=24x7
notification_period=24x7
check_interval=10.000000
retry_interval=2.000000
event_handler=
has_been_checked=1
should_be_scheduled=1
check_execution_time=0.000
check_latency=0.998
check_type=1
current_state=3
last_hard_state=3
last_event_id=1786960
current_event_id=1828117
current_problem_id=911999
last_problem_id=890477
current_attempt=5
max_attempts=5
state_type=1
last_state_change=1539892984
last_hard_state_change=1539907244
last_time_ok=1539884611
last_time_warning=0
last_time_unknown=1556902743
last_time_critical=1539408795
plugin_output=NRPE: Unable to read output
long_plugin_output=
performance_data=
last_check=1556902743
next_check=1556893230
check_options=0
current_notification_number=0
current_notification_id=0
last_notification=0
next_notification=0
no_more_notifications=0
notifications_enabled=1
active_checks_enabled=1
passive_checks_enabled=1
event_handler_enabled=1
problem_has_been_acknowledged=0
acknowledgement_type=0
flap_detection_enabled=1
failure_prediction_enabled=1
process_performance_data=1
obsess_over_service=1
last_update=1556902932
is_flapping=0
percent_state_change=0.00
scheduled_downtime_depth=0
}Code: Select all
contactstatus {
contact_name=alerting-contact
modified_attributes=0
modified_host_attributes=0
modified_service_attributes=0
host_notification_period=24x7
service_notification_period=24x7
last_host_notification=0
last_service_notification=1556902646
host_notifications_enabled=1
service_notifications_enabled=1
}