Long Version: I have Veeam Backup and Recovery doing the backups of my Hyper-V VMs. The backup is scripted in PowerShell, and has some neat features (like cleanup, emailing me the status, doing backups of multiple VMs, etc). Because it runs in PS, I wanted to have Nagios trigger the backup using NRPE, and wait for the status that the script will eventually return. That way, I can have Nagios generate an email to me in the event of a failure rather than having to check the backups daily. The issue I'm having is that it will undoubtedly take at least 1.5 hours every night (possibly up to two). Really, NRPE will have to wait that long for a status returned from the server. Is this possible?
When I run a test PS Script, I get this:
Code: Select all
root@CAC-Nagios:/usr/local/nagios/libexec# ./check_nrpe -H 10.1.1.18
I (0.4.3.143 2015-04-29) seem to be doing fine...
root@CAC-Nagios:/usr/local/nagios/libexec# ./check_nrpe -H 10.1.1.18 -c backup_gabriel
SUCCESS: Everything is going to be fine!
Code: Select all
# This is the window for triggering the VEEAM backup job on CAC-NAS
define timeperiod{
timeperiod_name server_backup
alias Window for backups
sunday 23:00-01:30
monday 23:00-01:30
tuesday 23:00-01:30
wednesday 23:00-01:30
thursday 23:00-01:30
friday 23:00-01:30
saturday 23:00-01:30
}Code: Select all
define service{
name backup-service ; The 'name' of this service template
active_checks_enabled 1 ; Active service checks are enabled
passive_checks_enabled 0 ; Passive service checks are enabled/accepted
parallelize_check 1 ; Active service checks should be parallelized (disabling this can lead to major performance problems)
obsess_over_service 0 ; We should obsess over this service (if necessary)
check_freshness 0 ; Default is to NOT check service 'freshness'
notifications_enabled 1 ; Service notifications are enabled
event_handler_enabled 1 ; Service event handler is enabled
flap_detection_enabled 0 ; Flap detection is enabled
process_perf_data 1 ; Process performance data
retain_status_information 1 ; Retain status information across program restarts
retain_nonstatus_information 1 ; Retain non-status information across program restarts
is_volatile 0 ; The service is not volatile
check_period server_backup ; The service can be checked at any time of the day
max_check_attempts 1 ; Re-check the service up to 1 times in order to determine its final (hard) state
normal_check_interval 360 ; Check the service every 6 hours under normal conditions, this puts it well outside of it's run window
retry_check_interval 180 ; Re-check the service every two minutes until a hard state can be determined
contact_groups admins ; Notifications get sent out to everyone in the 'admins' group
notification_options w,u,c,r ; Send notifications about warning, unknown, critical, and recovery events
notification_interval 1560 ; Re-notify about service problems every day and 2 hours, prevents repeat notifications
notification_period 24x7 ; Notifications can be sent out at any time
failure_prediction_enabled 0 ; We will wait for it to actually fail thank you!!
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
}
Code: Select all
define service{
use backup-service
host_name CAC-NAS
service_description Veeam Backup
check_command check_nrpe!backup_gabriel
}