Support forum for Nagios Core, Nagios Plugins, NCPA, NRPE, NSCA, NDOUtils and more. Engage with the community of users including those using the open source solutions.
I have nagios core 4.0.2 installed on Linux - Scientific 6.4. I have about 70 RedHat workstation and server clients added to the hosts and localhosts.cfg files. I have all my services defined for each host and everything looks like it's supposed to.
The problem I'm having is all my metrics are the same, I believe it is reporting the stats from the local nagios server hosting nagios core. I can't seem to figure this out, I've been through all my configuration files numerous times and I pulled in another sys admin who has this working at his site and he can't figure it out either. I'm sure it's a very simple fix I'm just missing something and being relatively new to Linux this process has been very painful with a very steep learning curve.
## Default linux Host Template ##
define host{
name linux-box ; Name of this template
use generic-host ; Inherit default values
check_period 24x7
check_interval 5
retry_interval 1
max_check_attempts 10
check_command check-host-alive
notification_period 24x7
notification_interval 30
notification_options d,r
contact_groups admins
register 0 ; DONT REGISTER THIS - ITS A TEMPLATE
}
## Default
define host{
use linux-box ; Inherit default values from a template
host_name localhost ; The name we're giving to this server
alias Red Hat Enterprise Linux 5.9 ; A longer name for the server
address 127.0.0.1 ; IP address of Remote Linux host
}
## mnlcfmaster1 ##
#define host{
#use generic-host
#host_name mlcfmaster1
#alias mlcfmaster1
#address 11.115.14.63
#}
## mnlcfmaster2 ##
define host{
use generic-host
host_name mlcfmaster2
alias mlcfmaster2
address 11.115.14.64
}
##############
## LOCALHOST ##
###############
define service{
use local-service ; Name of service template to use
host_name localhost
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
define service{
use local-service ; Name of service template to use
host_name localhost
service_description Root Partition
check_command check_local_disk!20%!10%!/
}
define service{
use local-service ; Name of service template to use
host_name localhost
service_description Current Users
check_command check_local_users!20!50
}
define service{
use local-service ; Name of service template to use
host_name localhost
service_description Total Processes
check_command check_local_procs!250!400!RSZDT
}
define service{
use local-service ; Name of service template to use
host_name localhost
service_description Current Load
check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
}
define service{
use local-service ; Name of service template to use
host_name localhost
service_description Swap Usage
check_command check_local_swap!20!10
}
define service{
use local-service ; Name of service template to use
host_name localhost
service_description SSH
check_command check_ssh
notifications_enabled 0
}
define service{
use local-service ; Name of service template to use
host_name localhost
service_description HTTP
check_command check_tcp!80
notifications_enabled 0
}
define service{
use generic-service
host_name localhost
service_description CPU Load
check_command check_nrpe!check_load
}
#define service{
# use generic-service
# host_name localhost
# service_description Total Processes
# check_command check_nrpe!check_total_procs
# }
define service{
use generic-service
host_name localhost
service_description Current Users
check_command check_nrpe!check_users
}
##################
## mlcfmaster1 ##
##################
define service{
use local-service ; Name of service template to use
host_name mnlcfmaster1
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
define service{
use local-service ; Name of service template to use
host_name mnlcfmaster1
service_description Root Partition
check_command check_local_disk!20%!10%!/
}
define service{
use local-service ; Name of service template to use
host_name mnlcfmaster1
service_description Current Users
check_command check_local_users!20!50
}
define service{
use local-service ; Name of service template to use
host_name mnlcfmaster1
service_description Total Processes
check_command check_local_procs!250!400!RSZDT
}
define service{
use local-service ; Name of service template to use
host_name mnlcfmaster1
service_description Current Load
check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
}
define service{
use local-service ; Name of service template to use
host_name mnlcfmaster1
service_description Swap Usage
check_command check_local_swap!20!10
}
define service{
use local-service ; Name of service template to use
host_name mnlcfmaster1
service_description SSH
check_command check_ssh
notifications_enabled 0
}
##################
## mlcfmaster2 ##
##################
define service{
use local-service ; Name of service template to use
host_name mnlcfmaster2
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
define service{
use local-service ; Name of service template to use
host_name mnlcfmaster2
service_description Root Partition
check_command check_local_disk!20%!10%!/
}
define service{
use local-service ; Name of service template to use
host_name mnlcfmaster2
service_description Current Users
check_command check_local_users!20!50
}
define service{
use local-service ; Name of service template to use
host_name mnlcfmaster2
service_description Total Processes
check_command check_local_procs!250!400!RSZDT
}
define service{
use local-service ; Name of service template to use
host_name mnlcfmaster2
service_description Current Load
check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
}
define service{
use local-service ; Name of service template to use
host_name mnlcfmaster2
service_description Swap Usage
check_command check_local_swap!20!10
}
define service{
use local-service ; Name of service template to use
host_name mnlcfmaster2
service_description SSH
check_command check_ssh
notifications_enabled 0
}
Also, I do have the /localhosts.cfg file configured with all the clinet machines and their IPs. However, in nagios.cfg I have this line commented out as follows. Without commenting this line out I cannot get the service to start / restart (errors)
# OBJECT CONFIGURATION FILE(S)
# These are the object configuration files in which you define hosts,
# host groups, contacts, contact groups, services, etc.
# You can split your object definitions across several config files
# if you wish (as shown below), or keep them all in a single config file.
# You can specify individual object config files as shown below:
cfg_file=/usr/local/nagios/etc/objects/commands.cfg
cfg_file=/usr/local/nagios/etc/objects/contacts.cfg
cfg_file=/usr/local/nagios/etc/objects/timeperiods.cfg
cfg_file=/usr/local/nagios/etc/objects/templates.cfg
#cfg_file=/usr/local/nagios/etc/hosts.cfg
cfg_file=/usr/local/nagios/etc/services.cfg
# Definitions for monitoring the local (Linux) host
cfg_file=/usr/local/nagios/etc/objects/localhost.cfg
# Definitions for monitoring a Windows machine
cfg_file=/usr/local/nagios/etc/objects/windows.cfg
# Definitions for monitoring a router/switch
#cfg_file=/usr/local/nagios/etc/objects/switch.cfg
# Definitions for monitoring a network printer
cfg_file=/usr/local/nagios/etc/objects/printer.cfg
You are using the "local-service" templates for your remote system checks. The local-service template should only be used for localhost. You will need to use nrpe or another agent of your choice to check the remote systems. Use the "generic-service" template for the remote service checks as well. http://nagios.sourceforge.net/docs/nrpe/NRPE.pdf
Former Nagios employee
"It is turtles. All. The. Way. Down. . . .and maybe an elephant or two."
VI VI VI - The editor of the Beast!
Come to the Dark Side.
abrist wrote:You are using the "local-service" templates for your remote system checks. The local-service template should only be used for localhost. You will need to use nrpe or another agent of your choice to check the remote systems. Use the "generic-service" template for the remote service checks as well. http://nagios.sourceforge.net/docs/nrpe/NRPE.pdf
Ok I modified my templates and changed the local-service to generic-service. I also ensured on a couple clients that I can do a check_nrpe and I get the nagios version returned back to me. This points to nrpe being installed correctly on the remote linux client as I am doing the check_nrpe from the server.
Still... The machines are all reporting the localhost metrics. I did restart the service after making all the changes.
Sorry to have to ask again, but can you get a fresh copy of one of the remote host configurations, including it's host definition and service definitions? Though they are lengthy we really need to take a look at a full example. Thanks a ton!
Sorry it has taken so long for me to get back to everyone. Here are the examples. I sitll have not been able to get this working. I have nagios 4.0.2 fresh install on scientific 6.4 x64. Web front-end is up and running. the two hosts defined in my hosts file are configured with nrpe and I can do a check_nrpe on them and it pulls the version.
I can start nagios with no errors or when I verify nagios before starting the service I get no errors. You can see I am using the "generic-service" template now as well.
Stil.. All my metrics being reported in Nagios are all for the local host.
################
# CFMASTER1 #
################
define service{
use generic-service ; Name of service template to use
host_name cfmaster1
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
define service{
use generic-service ; Name of service template to use
host_name cfmaster1
service_description Root Partition
check_command check_local_disk!20%!10%!/
}
define service{
use generic-service ; Name of service template to use
host_name cfmaster1
service_description Current Users
check_command check_local_users!20!50
}
define service{
use generic-service ; Name of service template to use
host_name cfmaster1
service_description Total Processes
check_command check_local_procs!250!400!RSZDT
}
define service{
use generic-service ; Name of service template to use
host_name cfmaster1
service_description Current Load
check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
}
define service{
use generic-service ; Name of service template to use
host_name cfmaster1
service_description Swap Usage
check_command check_local_swap!20!10
}
define service{
use generic-service ; Name of service template to use
host_name cfmaster1
service_description SSH
check_command check_ssh
notifications_enabled 0
}
#############
# LINUX01 #
#############
define service{
use generic-service ; Name of service template to use
host_name linux01
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
define service{
use generic-service ; Name of service template to use
host_name linux01
service_description Root Partition
check_command check_local_disk!20%!10%!/
}
define service{
use generic-service ; Name of service template to use
host_name linux01
service_description Current Users
check_command check_local_users!20!50
}
define service{
use generic-service ; Name of service template to use
host_name linux01
service_description Total Processes
check_command check_local_procs!250!400!RSZDT
}
define service{
use generic-service ; Name of service template to use
host_name linux01
service_description Current Load
check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
}
define service{
use generic-service ; Name of service template to use
host_name mdlinux01
service_description Swap Usage
check_command check_local_swap!20!10
}
define service{
use generic-service ; Name of service template to use
host_name linux01
service_description SSH
check_command check_ssh
notifications_enabled 0
}
Doing some trial and error I figure out that any changes I make to localhost.cfg is reflected through all my remote hosts. So even though my service.cfg defince "generic-service" and my hosts.cfg defines "generic-hosts" it is still using the localhost.cfg file for all my hosts.
I'm not sure how to rectify this. Can someone please help?
Yep. Your issue is that you are using the service checks "check_local_*" to check your remote systems. If you look at the command definitions, you will notice that the check_local_* commands all check local host. You need to remove the string "_local" from your service definitions for the remote hosts.
For example, change:
define service{
use generic-service ; Name of service template to use
host_name cfmaster1
service_description Current Users
check_command check_local_users!20!50
}
define service{
use generic-service ; Name of service template to use
host_name cfmaster1
service_description Current Users
check_command check_users!20!50
}
Otherwise, all checks are run against localhost instead of the remote host. Warning here, do not change the check_local_* commands themselves though, otherwise your localhost checks will stop working. Just change the service definition check_command directive for the remote hosts.
Former Nagios employee
"It is turtles. All. The. Way. Down. . . .and maybe an elephant or two."
VI VI VI - The editor of the Beast!
Come to the Dark Side.