Page 1 of 1

Why am I not being contacted?

Posted: Sat Aug 08, 2020 6:35 pm
by bigcrater
nagios 4.4.6
nagios-plugins 2.3.3
Hardware -- raspberry pi

I am not being contacted. I deliberately put in a check_by_ssh on
a service that does not exist so that it should always fail.
I thought I had set this up so that I would be contacted in the event
of failure (and failure is certain). The web interface does indeed
say that the service is down. But I am not contacted.

I want this to turn on an LED. The command to do so does work -- ie,
the LED comes on.

I've trimmed all my files down to just the one service that isn't working.

Code: Select all

[root@cyril etc]# ../bin/nagios -v nagios.cfg 

Nagios Core 4.4.6
Copyright (c) 2009-present Nagios Core Development Team and Community Contributors
Copyright (c) 1999-2009 Ethan Galstad
Last Modified: 2020-04-28
License: GPL

Website: https://www.nagios.org
Reading configuration data...
   Read main config file okay...
   Read object config files okay...

Running pre-flight check on configuration data...

Checking objects...
	Checked 1 services.
	Checked 2 hosts.
	Checked 2 host groups.
	Checked 0 service groups.
	Checked 1 contacts.
	Checked 1 contact groups.
	Checked 3 commands.
	Checked 5 time periods.
	Checked 0 host escalations.
	Checked 0 service escalations.
Checking for circular paths...
	Checked 2 hosts
	Checked 0 service dependencies
	Checked 0 host dependencies
	Checked 5 timeperiods
Checking global event handlers...
Checking obsessive compulsive processor commands...
Checking misc settings...

Total Warnings: 0
Total Errors:   0

Things look okay - No serious problems were detected during the pre-flight check
commands.cfg

Code: Select all

define command {
        command_name    always_fail
        command_line    $USER1$/check_by_ssh -H ws.jgscrater.com -l root -C "/us
r/bin/pgrep -alf dietslurm"
}

#       this is only used in the notification biz.
define command {
        command_name    turn_on_led
        command_line    /opt/bin/aon.sh
}
#       definition of check-host-alive omitted for clarity.
services.cfg

Code: Select all

define service {
        use                     local-service
        hostgroup_name          workstation
        service_description     seeking new diet slurm
        check_command           always_fail
}
hostgroups.cfg

Code: Select all

define host {
        use     linux-server
        host_name       ws.jgscrater.com
        alias           ws
        address         10.1.1.3
}
define hostgroup {
        hostgroup_name  workstation
        members         ws.jgscrater.com
}
contacts.cfg

Code: Select all

define contact {
    contact_name            nagiosadmin
    use                     led-contact
    alias                   Nagios Admin
    email                   [email protected]
}
define contactgroup {
    contactgroup_name       jgscrater_admins
    alias                   Nagios Administrators
    members                 nagiosadmin
}

templates.cfg

Code: Select all

define contact {
    name                            led-contact
    service_notification_period     24x7
    host_notification_period        24x7
    service_notification_options    w,u,c,r,f,s
    host_notification_options       d,u,r,f,s
    service_notification_commands   turn_on_led
    host_notification_commands      turn_on_led
    register                        1
}
define host {

    name                            generic-host
    notifications_enabled           1
    event_handler_enabled           1
    flap_detection_enabled          1
    process_perf_data               1
    retain_status_information       1
    retain_nonstatus_information    1
    notification_period             24x7
    register                        0
}

define host {
    name                            linux-server
    use                             generic-host
    check_period                    24x7
    check_interval                  5
    retry_interval                  1
    max_check_attempts              10
    check_command                   check-host-alive
    notification_period             workhours
    notification_interval           120
    notification_options            d,u,r
    contact_groups                  jgscrater_admins
    register                        0
}
define service {
    name                            generic-service
    active_checks_enabled           1
    passive_checks_enabled          1
    parallelize_check               1
    obsess_over_service             1
    check_freshness                 0
    notifications_enabled           1
    event_handler_enabled           1
    flap_detection_enabled          1
    process_perf_data               1
    retain_status_information       1
    retain_nonstatus_information    1
    is_volatile                     0
    check_period                    24x7
    max_check_attempts              3
    check_interval                  10
    retry_interval                  2
    contact_groups                  jgscrater_admins
    notification_options            w,u,c,r
    notification_interval           60
    notification_period             24x7
    register                        0
}
define service {
        name                    local-service
        use                     generic-service
        max_check_attempts      4
        check_interval          5
        retry_interval          1
        register                0
        #contact_groups         jgscrater_admins
}

Re: Why am I not being contacted?

Posted: Mon Aug 10, 2020 1:13 pm
by bigcrater
It was difficult to find. I have a workaround by changing the mode
of the notifying binary to 4755. I'm not crazy about that workaround
so I may see if I can find a better fix.

Re: Why am I not being contacted?

Posted: Fri Aug 14, 2020 10:37 am
by benjaminsmith
Hi @bigcrater,

Cool project! Glad to hear you found a workaround, the nagios user account is going to need group permissions to run the notification command, but you can restrict the settings for all users.