Skip to main content

Nagios Core - Monitoring Setup

1294 words·
Nagios Core Monitoring Apache NRPE
Table of Contents

Prerequisites
#

For this tutorial I use the following setup:

# Nagios server: Ubuntu 22-04
192.168.30.90

# Linux host 1: Ubuntu 22-04
192.168.30.91

# Linux host 2: Ubuntu 22-04
192.168.30.92

Nagios Core
#

Install Nagios
#

# Update package index
sudo apt update

# Install dependencies
sudo apt install wget unzip vim curl openssl build-essential libgd-dev libssl-dev libapache2-mod-php php-gd php apache2 -y
# Check latest version
https://www.nagios.org/projects/nagios-core/4x/

# Change directory
cd /tmp

# Download
sudo wget https://assets.nagios.com/downloads/nagioscore/releases/nagios-4.4.14.tar.gz

# Unpack
sudo tar -xvzf nagios-4.4.14.tar.gz && cd nagios-4.4.14

# Run configuration
sudo ./configure --with-httpd-conf=/etc/apache2/sites-enabled

# Compile: All components define in Makefile
sudo make all

# Create nagios user & group
sudo make install-groups-users

# Add www-data to nagios group
sudo usermod -aG nagios www-data

# Install Nagios Core
sudo make install
# Create systemd service unit: Used to manage Nagios
sudo make install-daemoninit

# Install and configure the external command file
sudo make install-commandmode

# Install sample configuration
sudo make install-config

# Install Apache2 config files and configure settings
sudo make install-webconf
sudo a2enmod rewrite cgi

# Restart Apache2
sudo systemctl restart apache2

Admin User
#

# Create initial admin user: Prompt for pw
sudo htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin

# Create further users: Prompt for pw
sudo htpasswd /usr/local/nagios/etc/htpasswd.users another-user

Install Nagios Plugins
#

# Install prerequisites
sudo apt install autoconf gcc libc6 libmcrypt-dev make libssl-dev wget bc gawk dc build-essential snmp libnet-snmp-perl gettext -y
# change directory
cd /tmp

# Download
sudo wget https://github.com/nagios-plugins/nagios-plugins/releases/download/release-2.4.6/nagios-plugins-2.4.6.tar.gz

# Unpack
sudo tar -xvzf nagios-plugins-2.4.6.tar.gz && cd nagios-plugins-2.4.6


# Start configuration
sudo ./configure --with-nagios-user=nagios --with-nagios-group=nagios

# Install
sudo make && sudo make install

# Verify and enable config
sudo /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg

Start Nagios
#

# Start and enable nagios
sudo systemctl start nagios

# Check status
sudo systemctl status nagios

# Check logs
sudo journalctl -xeu nagios.service

SSL Encryption
#

# Open configuration
sudo vi /etc/apache2/sites-available/default-ssl.conf
# /etc/apache2/sites-available/default-ssl.conf
<IfModule mod_ssl.c>
        <VirtualHost _default_:443>
                ServerAdmin webmaster@localhost
                DocumentRoot /var/www/html

                ErrorLog ${APACHE_LOG_DIR}/error.log
                CustomLog ${APACHE_LOG_DIR}/access.log combined

                SSLEngine on

                SSLCertificateFile    /etc/certs/fullchain.pem # Define Let's Encrypt Certificates
                SSLCertificateKeyFile /etc/certs/privkey.pem # Define Let's Encrypt Certificates

                <FilesMatch "\.(cgi|shtml|phtml|php)$">
                                SSLOptions +StdEnvVars
                </FilesMatch>
                <Directory /usr/lib/cgi-bin>
                                SSLOptions +StdEnvVars
                </Directory>
        </VirtualHost>
</IfModule>
# Check configuration
sudo apache2ctl configtest

# Enable the SSL module
sudo a2enmod ssl

# Enable configuration
sudo a2ensite default-ssl.conf

# Restart Apache2
sudo systemctl restart apache2

Host
#

NRPE & Plugins
#

# Add hosts entry for Nagios Core server
sudo vi /etc/hosts

# Add Nagios Core server IP
192.168.30.90 nagios.jklug.work
# Update package index
sudo apt update

# Install NRPE (Nagios Remote Plugin Executor)
sudo apt install nagios-nrpe-server nagios-plugins -y
  • nagios-nrpe-server NRPE daemon used to run checks on remote host

  • nagios-plugins Monitoring plugins that are executed by NRPE

# Edit NRPE config
sudo vi /etc/nagios/nrpe.cfg
# Add Nagios Core server IP to config:
allowed_hosts=127.0.0.1,nagios.jklug.work,192.168.30.90

# Allow NRPE daemon to specify arguments to commands that are executed: 0=do not allow arguments, 1=allow command arguments
dont_blame_nrpe=1

# Define Nagios plugins allowed to use by NRPE: Default
command[check_users]=/usr/lib/nagios/plugins/check_users -w 5 -c 10
command[check_load]=/usr/lib/nagios/plugins/check_load -r -w .15,.10,.05 -c .30,.25,.20
command[check_hda1]=/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/hda1
command[check_zombie_procs]=/usr/lib/nagios/plugins/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/lib/nagios/plugins/check_procs -w 250 -c 300
  • w Warning

  • c Critical

# Restart NRPE
sudo systemctl restart nagios-nrpe-server

# Check status
sudo systemctl status nagios-nrpe-server

Nagios Server
#

Test NRPE
#

# Install NRPE check
sudo apt install nagios-nrpe-plugin -y

# Check NRPE connection from nagios server to remote host
/usr/lib/nagios/plugins/check_nrpe -H 192.168.30.91 -c check_load
# Copy Nagios NRPE check
sudo cp /usr/lib/nagios/plugins/check_nrpe /usr/local/nagios/libexec/

# Otherwuse you'll get the following Nagios error from the webinterface:
(No output on stdout) stderr: execvp(/usr/local/nagios/libexec/check_nrpe, ...) failed. errno is 2: No such file or directory

Add NRPE check
#

  • Add NRPE check
# Open commands
sudo vi /usr/local/nagios/etc/objects/commands.cfg
# /usr/local/nagios/etc/objects/commands.cfg
define command {
    command_name    check_nrpe
    command_line    $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
    }

Main Configuration File
#

  • Define configuration files and folders
# Edit nagios.cfg
sudo vi /usr/local/nagios/etc/nagios.cfg
# nagios.cfg
cfg_file=/usr/local/nagios/etc/objects/hostgroups.cfg
cfg_file=/usr/local/nagios/etc/objects/hosts.cfg
cfg_file=/usr/local/nagios/etc/objects/services.cfg
cfg_dir=/usr/local/nagios/etc/servers/
cfg_dir=/usr/local/nagios/etc/services/
  • Create configuration files and folders
# Create configuration files
sudo touch /usr/local/nagios/etc/objects/hostgroups.cfg &&
sudo touch /usr/local/nagios/etc/objects/hosts.cfg &&
sudo touch /usr/local/nagios/etc/objects/services.cfg

# Change owner
sudo chown nagios.nagios /usr/local/nagios/etc/objects/hostgroups.cfg &&
sudo chown nagios.nagios /usr/local/nagios/etc/objects/hosts.cfg &&
sudo chown nagios.nagios /usr/local/nagios/etc/objects/services.cfg

# Change permissions
sudo chmod 664 /usr/local/nagios/etc/objects/hostgroups.cfg &&
sudo chmod 664 /usr/local/nagios/etc/objects/hosts.cfg &&
sudo chmod 664 /usr/local/nagios/etc/objects/services.cfg


# Create directory for servers / hosts and change permissions
sudo mkdir -p /usr/local/nagios/etc/{servers,services} &&
sudo chown nagios.nagios /usr/local/nagios/etc/{servers,services} &&
sudo chmod 775 /usr/local/nagios/etc/{servers,services}

Define Hostgroups
#

# Edit hostgroups
sudo vi /usr/local/nagios/etc/objects/hostgroups.cfg
# /usr/local/nagios/etc/objects/hostgroups.cfg
define hostgroup {
    hostgroup_name linux-group-1
    alias Linux Servers 1
    members ubuntu-1
    }

define hostgroup {
    hostgroup_name linux-group-2
    alias Linux Servers 2
    members ubuntu-2
    }

define hostgroup {
    hostgroup_name all-linux-servers
    alias All Linux Servers
    hostgroup_members linux-group-1,linux-group-2
    }

Define Host
#

Hosts describe machines that should be monitored, they consist of a short name, a descriptive name, and an IP address or host name. The following configuration defines a host and some basic checks for the host in the same configuration file. It is also possible to split the host and service definitions into separate files like hosts.cfg and services.cfg.

  • Host 1
# Add new server for monitoring
sudo vi /usr/local/nagios/etc/servers/ubuntu-1.cfg
# Define host
define host {
    host_name               ubuntu-1
    alias                   Ubuntu Server 1
    address                 192.168.30.91
    check_command           check-host-alive
    check_interval          1 ; Every minute
    retry_interval          1
    max_check_attempts      5
    check_period            24x7
    }

# Check ping
define service {
    use                     local-service
    host_name               ubuntu-1
    service_description     PING
    check_command           check_ping!100.0,20%!500.0,60%
    check_interval          1
    }

# Check load
define service {
    use                     local-service
    host_name               ubuntu-1
    service_description     Current Load
    check_command           check_nrpe!check_load
    check_interval          1
    }

# Check total processes
define service {
    use                     local-service
    host_name               ubuntu-1
    service_description     Total Processes
    check_command           check_nrpe!check_total_procs
    check_interval          1
    }

# Check current users
define service {
    use                     local-service
    host_name               ubuntu-1
    service_description     Current Users
    check_command           check_nrpe!check_users
    check_interval          1
    }

# Check total processes
define service {
    use                     local-service
    host_name               ubuntu-1
    service_description     Total Processes
    check_command           check_nrpe!check_total_procs
    check_interval          1
    }
  • Host 2
# Add new server for monitoring
sudo vi /usr/local/nagios/etc/servers/ubuntu-2.cfg
# Define host
define host {
    host_name               ubuntu-2
    alias                   Ubuntu Server 2
    address                 192.168.30.92
    check_command           check-host-alive
    check_interval          1 ; Every minute
    retry_interval          1
    max_check_attempts      5
    check_period            24x7
    }

# Check ping
define service {
    use                     local-service
    host_name               ubuntu-2
    service_description     PING
    check_command           check_ping!100.0,20%!500.0,60%
    }

# Check load
define service {
    use                     local-service
    host_name               ubuntu-2
    service_description     Current Load
    check_command           check_nrpe!check_load
    }

# Check total processes
define service {
    use                     local-service
    host_name               ubuntu-2
    service_description     Total Processes
    check_command           check_nrpe!check_total_procs
    }

# Check current users
define service {
    use                     local-service
    host_name               ubuntu-2
    service_description     Current Users
    check_command           check_nrpe!check_users
    }

# Check total processes
define service {
    use                     local-service
    host_name               ubuntu-2
    service_description     Total Processes
    check_command           check_nrpe!check_total_procs
    }

Check Configuration
#

# Check host config for errors
sudo /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg

Restart Nagios
#

# Restart Nagios
sudo systemctl restart nagios

# Check status
sudo systemctl status nagios

# Check logs
sudo journalctl -xeu nagios.service

Nagios Webinterface
#

Login
#

# Open URL
https://nagios.jklug.work/nagios/

# User:
nagiosadmin

Monitoring
#


Services for several Hosts
#

Nagios Server
#

The following configuration defines a service for several servical groups.

  • HTTP Check
# Create config file
sudo vi /usr/local/nagios/etc/services/linux-group-1_www.cfg

# Change owner and permissions
sudo chown nagios.nagios /usr/local/nagios/etc/services/linux-group-1_www.cfg &&
sudo chmod 664 /usr/local/nagios/etc/services/linux-group-1_www.cfg
# Check http for hostgroup: linux-group-1
define service{
    hostgroup_name          linux-group-1
    service_description     HTTP
    check_command           check_http
    check_interval          1
    retry_interval          3
    max_check_attempts      3
    check_period            24x7
    }
  • SSH Check
# Create config file
sudo vi /usr/local/nagios/etc/services/all-linux-servers_ssh.cfg

# Change owner and permissions
sudo chown nagios.nagios /usr/local/nagios/etc/services/all-linux-servers_ssh.cfg &&
sudo chmod 664 /usr/local/nagios/etc/services/all-linux-servers_ssh.cfg
# Check SSH for hostgroup: all-linux-servers
define service {
    hostgroup_name          all-linux-servers
    service_description     SSH
    check_command           check_ssh
    check_interval          1
    retry_interval          3
    max_check_attempts      3
    check_period            24x7
    }

Check Configuration
#

# Check host config for errors
sudo /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg

Restart Nagios
#

# Restart Nagios
sudo systemctl restart nagios

# Check status
sudo systemctl status nagios

# Check logs
sudo journalctl -xeu nagios.service

Nagios Webinterface
#